# -*- coding: utf-8 -*-
import scrapy
import re
from novel_crawl.items import NovelCrawlItem,NovelCrawDatailsItem

class NovelSpider(scrapy.Spider):
    name = 'novel'
    allowed_domains = ['www.boquge.com']
    start_urls = ['https://www.boquge.com/book/29536/']

    def parse(self, response):
        chapters = []
        chapters_href = []
        chapters_id = []
        novels_id = []
        novelcrawl= NovelCrawlItem()
        
        novelcrawl['title'] = response.css('.list-group h1::text').extract_first().strip()
        novelcrawl['author'] = response.css(".list-group h1 small::text").extract_first().strip()
        novelcrawl['new_chapter'] = response.css(".list-group .col-xs-12.list-group-item.no-border a::text").extract_first().strip()
        novelcrawl['simple_introductio'] = response.css("#all::text").extract_first().strip()
        novelcrawl['volume']=response.css(".volumn::text").extract_first().strip()
        chapter = response.css('#chapters-list li')
        for row in chapter[1:]:
            ids = row.css("a::attr(href)").re(".*?(\d+).*?")
            chapters.append(row.css("a::text").extract_first())
            chapters_href.append(row.css("a::attr(href)").extract_first())
            chapters_id.append(ids[1])
            novels_id.append(ids[0])
        err1 = chapters.index("上架感言")
        err2 = chapters.index("喜讯，小仙仙诞生了")
        del chapters[err1]
        del chapters_href[err1]
        del chapters[err2]
        del chapters_href[err2]
        novelcrawl['chapter_id'] = ""
        for href in chapters_href:
            yield scrapy.Request(href, dont_filter=True,callback=self.details)
            
    def details(self,response):
        noveldatails= NovelCrawDatailsItem()
        ids = re.search(".*?(\d+).*?\(d+).*?",response.url)
        if ids:
            novel_id = ids.group(1)
            datails_id = ids.group(2)
        noveldatails['novel_id'] = novel_id
        noveldatails['datails_id'] = datails_id
        noveldatails['title'] = response.css("#h1 h1::text").extract_first()
        noveldatails['content'] = response.css('#txtContent').extract()




            
